bbdc473c3133edd8112134db5dd083fc96b286f0,src/main/java/com/datumbox/examples/Classification.java,Classification,main,#String[]#,57

Before Change


        headerDataTypes.put("test result", TypeInference.DataType.CATEGORICAL);
        
        
        Dataset trainingDataset = Dataset.Builder.parseCSVFile(fileReader, "test result", headerDataTypes, '\t', '"', "\r\n", dbConf);
        Dataset testingDataset = trainingDataset.copy();
        
        
        //Transform Dataset
        //-----------------
        
        //Normalize continuous variables
        XMinMaxNormalizer dataTransformer = new XMinMaxNormalizer("Diabetes", dbConf);
        dataTransformer.fit_transform(trainingDataset, new XMinMaxNormalizer.TrainingParameters());
        


        //Feature Selection
        //-----------------
        
        //Perform dimensionality reduction using PCA
        
        PCA featureSelection = new PCA("Diabetes", dbConf);
        PCA.TrainingParameters featureSelectionParameters = new PCA.TrainingParameters();
        featureSelectionParameters.setMaxDimensions(trainingDataset.getVariableNumber()-1); //remove one dimension
        featureSelectionParameters.setWhitened(false);
        featureSelectionParameters.setVariancePercentageThreshold(0.99999995);
        featureSelection.fit_transform(trainingDataset, featureSelectionParameters);
        
        
        
        //Fit the classifier
        //------------------
        
        SoftMaxRegression classifier = new SoftMaxRegression("Diabetes", dbConf);
        
        SoftMaxRegression.TrainingParameters param = new SoftMaxRegression.TrainingParameters();
        param.setTotalIterations(200);
        param.setLearningRate(0.1);
        
        classifier.fit(trainingDataset, param);
        
        //Denormalize trainingDataset (optional)
        dataTransformer.denormalize(trainingDataset);
        
        
        //Use the classifier
        //------------------
        
        //Apply the same data transformations on testingDataset 
        dataTransformer.transform(testingDataset);
        
        //Apply the same featureSelection transformations on testingDataset
        featureSelection.transform(testingDataset);
        
        //Get validation metrics on the training set
        SoftMaxRegression.ValidationMetrics vm = classifier.validate(testingDataset);
        classifier.setValidationMetrics(vm); //store them in the model for future reference
        
        //Denormalize testingDataset (optional)
        dataTransformer.denormalize(testingDataset);
        
        System.out.println("Results:");
        for(Integer rId: testingDataset) {
            Record r = testingDataset.get(rId);
            System.out.println("Record "+rId+" - Real Y: "+r.getY()+", Predicted Y: "+r.getYPredicted());
        }
        
        System.out.println("Classifier Statistics: "+PHPfunctions.var_export(vm));
        
        
        
        //Clean up
        //--------
        
        //Erase data transformer, featureselector and classifier.
        dataTransformer.erase();
        featureSelection.erase();
        classifier.erase();
        
        //Erase datasets.
        trainingDataset.erase();
        testingDataset.erase();
    }
    
}

After Change


            headerDataTypes.put("test result", TypeInference.DataType.CATEGORICAL);


            trainingDataframe = Dataframe.Builder.parseCSVFile(fileReader, "test result", headerDataTypes, '\t', '"', "\r\n", dbConf);
        }
        catch(UncheckedIOException | IOException | URISyntaxException ex) {
            throw new RuntimeException(ex);
        }
        Dataframe testingDataframe = trainingDataframe.copy();
        
        
        //Transform Dataframe
        //-----------------
        
        //Normalize continuous variables
        XMinMaxNormalizer dataTransformer = new XMinMaxNormalizer("Diabetes", dbConf);
        dataTransformer.fit_transform(trainingDataframe, new XMinMaxNormalizer.TrainingParameters());
        


        //Feature Selection
        //-----------------
        
        //Perform dimensionality reduction using PCA
        
        PCA featureSelection = new PCA("Diabetes", dbConf);
        PCA.TrainingParameters featureSelectionParameters = new PCA.TrainingParameters();
        featureSelectionParameters.setMaxDimensions(trainingDataframe.xColumnSize()-1); //remove one dimension
        featureSelectionParameters.setWhitened(false);
        featureSelectionParameters.setVariancePercentageThreshold(0.99999995);
        featureSelection.fit_transform(trainingDataframe, featureSelectionParameters);
        
        
        
        //Fit the classifier
        //------------------
        
        SoftMaxRegression classifier = new SoftMaxRegression("Diabetes", dbConf);
        
        SoftMaxRegression.TrainingParameters param = new SoftMaxRegression.TrainingParameters();
        param.setTotalIterations(200);
        param.setLearningRate(0.1);
        
        classifier.fit(trainingDataframe, param);
        
        //Denormalize trainingDataframe (optional)
        dataTransformer.denormalize(trainingDataframe);
        
        
        //Use the classifier
        //------------------
        
        //Apply the same data transformations on testingDataframe 
        dataTransformer.transform(testingDataframe);
        
        //Apply the same featureSelection transformations on testingDataframe
        featureSelection.transform(testingDataframe);
        
        //Get validation metrics on the training set
        SoftMaxRegression.ValidationMetrics vm = classifier.validate(testingDataframe);
        classifier.setValidationMetrics(vm); //store them in the model for future reference
        
        //Denormalize testingDataframe (optional)
        dataTransformer.denormalize(testingDataframe);
        
        System.out.println("Results:");
        for(Map.Entry<Integer, Record> entry: testingDataframe.entries()) {
            Integer rId = entry.getKey();
            Record r = entry.getValue();
            System.out.println("Record "+rId+" - Real Y: "+r.getY()+", Predicted Y: "+r.getYPredicted());
        }
        
        System.out.println("Classifier Statistics: "+PHPfunctions.var_export(vm));
        
        
        
        //Clean up
        //--------
        
        //Erase data transformer, featureselector and classifier.
        dataTransformer.delete();
        featureSelection.delete();
        classifier.delete();
        
        //Erase Dataframes.
        trainingDataframe.delete();
        testingDataframe.delete();
    }
    
}